In [2]:
import os
import math
import numpy as np
from ctypes import CDLL, POINTER,  c_int32, c_int16, c_bool
from IPython.display import display, Audio, Markdown
from scipy.signal import spectrogram
import matplotlib.pyplot as plt

from pydub import AudioSegment

# Init C interface

current_dir = os.path.dirname(os.path.realpath(os.path.abspath('')))
so_file = os.path.normpath(current_dir + '/build/libdsp.so')

c_interface = CDLL(so_file)

c_interface.init_audio_accumulator.argtypes = (
    c_int32,
    c_int32,
    c_int32,
    c_int32,
    c_int32,
)
c_interface.init_audio_accumulator.restype = None

c_interface.accumulate_input.argtypes = [
    np.ctypeslib.ndpointer(dtype=np.int16, ndim=1, flags='CONTIGUOUS')
]
c_interface.accumulate_input.restype = None

c_interface.extract_output.argtypes = []
c_interface.extract_output.restype = POINTER(c_int16)

c_interface.is_output_ready.argtypes = []
c_interface.is_output_ready.restype = c_bool

# Init audio accumulator

NUM_INPUT_FRAMES = 1024
NUM_PROCESSING_FRAMES = (NUM_INPUT_FRAMES * 2)
NUM_OUTPUT_FRAMES = 2048
NUM_DELAY_FRAMES = 2048
SAMPLE_RATE = 22050
OSAMP = 2
NUM_CHANNELS = 2
SAMPLES_PER_SECOND = SAMPLE_RATE * NUM_CHANNELS

c_interface.init_audio_accumulator(
    NUM_INPUT_FRAMES,
    NUM_OUTPUT_FRAMES,
    NUM_PROCESSING_FRAMES,
    OSAMP,
    SAMPLE_RATE
)

# Display audio functions

def display_spectrogram(audio):
    f, t, Sxx = spectrogram(audio, fs=SAMPLE_RATE, window='hann', nperseg=1024)
    plt.pcolormesh(t, f, 10 * np.log10(Sxx + 1e-10), shading='gouraud')
    plt.ylabel('Frequency [Hz]')
    plt.xlabel('Time [sec]')
    plt.colorbar(label='dB')
    plt.show()

def display_audio(name, audio_l, audio_r):
    display(
        Markdown('### ' + name),
        Audio(data=(audio_l, audio_r), rate=SAMPLE_RATE)
    )
    audio_mono = (audio_l.astype(np.float32) + audio_r.astype(np.float32)) / 2
    display_spectrogram(audio_mono)

# Audio processing function

def process_audio(audio_file_name):
    # Load audio (and interleave it like a real world audio signal)
    file_path = "audio/" + audio_file_name + ".mp3"
    audio = AudioSegment.from_mp3(file_path).set_frame_rate(SAMPLE_RATE)
    input_audio = np.array(audio.get_array_of_samples())
    start_sample = SAMPLES_PER_SECOND * 30
    end_sample = SAMPLES_PER_SECOND * 60
    input_audio = input_audio[start_sample:end_sample]

    # Process each window of audio
    output_audio = np.empty((0,), dtype=np.int16)
    num_channels = 2
    samples_per_input_buffer = NUM_INPUT_FRAMES * num_channels
    samples_per_output_buffer = NUM_OUTPUT_FRAMES * num_channels
    n_windows = math.floor(len(input_audio) / samples_per_input_buffer)
    for i in range(n_windows):
        start = i * samples_per_input_buffer
        end = start + samples_per_input_buffer
        window = input_audio[start:end]
        c_interface.accumulate_input(window)
        is_output_ready = c_interface.is_output_ready()
        if is_output_ready:
            output_audio_buffer = c_interface.extract_output()
            output_audio_nparray = np.ctypeslib.as_array(output_audio_buffer, shape=(samples_per_output_buffer,))
            output_audio = np.concatenate([output_audio, output_audio_nparray])

    # Deinterleave audio
    input_l = input_audio[::2]
    input_r = input_audio[1::2]
    output_l = output_audio[::2]
    output_r = output_audio[1::2]

    # Display output
    input_mono = (input_l.astype(np.float32) + input_r.astype(np.float32)) / 2
    display_audio("Input (stereo)", input_mono, input_mono)
    display_audio("Output (left)", output_l, output_l)
    display_audio("Output (right)", output_r, output_r)
    display_audio("Output (stereo)", output_l, output_r)

# Process several audio tracks
audio_file_names = [
    'Bad Bunny - NUEVAYoL',
    'Miley Cyrus - We Can\'t Stop',
    'SZA - BMF',
]
for audio_file_name in audio_file_names:
    display(
        Markdown('## ' + audio_file_name)
    )
    process_audio(audio_file_name)

Bad Bunny - NUEVAYoL¶

Input (stereo)¶

Your browser does not support the audio element.

Output (left)¶

Your browser does not support the audio element.

Output (right)¶

Your browser does not support the audio element.

Output (stereo)¶

Your browser does not support the audio element.

Miley Cyrus - We Can't Stop¶

Input (stereo)¶

Your browser does not support the audio element.

Output (left)¶

Your browser does not support the audio element.

Output (right)¶

Your browser does not support the audio element.

Output (stereo)¶

Your browser does not support the audio element.

SZA - BMF¶

Input (stereo)¶

Your browser does not support the audio element.

Output (left)¶

Your browser does not support the audio element.

Output (right)¶

Your browser does not support the audio element.

Output (stereo)¶

Your browser does not support the audio element.
In [ ]: